import os
import sys
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tensorflow import keras
import tensorflow as tf  # tf版本为1.13
from tensorflow import keras
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
import warnings

warnings.filterwarnings('ignore')

os.environ["CUDA_VISIBLE_DEVICES"] = ""

# 加载原始数据
# data = pd.read_csv('UCI_air/PRSA_data_2010.1.1-2014.12.31.csv')
# print(data.head())
data = pd.read_csv("../data/econ/table2.csv", encoding='gbk')
print(range(10))
# 整合时间
# date = pd.PeriodIndex(year=data["year"], month=data["month"], day=data["day"], hour=data['hour'], freq="H")
# data['date'] = date
# drop_cols = ['No','year','month','day','hour']
# for col in drop_cols:
#     data.drop(col,axis=1,inplace=True)
# data.set_index('date',inplace=True)
# data.columns=['pollution','dew','temp','press','wnd_dir','wnd_spd','snow','rain']
# print(data.head())

# # 处理空值
# data['pollution'].fillna(0,inplace=True)
# data = data[24:]
# data.head()
# data.to_csv('UCI_air/pollution.csv',index=None)


# # plt_cols = ['pollution','dew','temp','press','wnd_spd','snow','rain']

print(1)

# 生成LSTM需要的数据格式

new_data = data.copy()
new_data = new_data.drop(['Unnamed: 0'], axis=1)
new_data['y'] = new_data['金属制品业'].shift(-1)  #
new_data = new_data.drop(['金属制品业'], axis=1)[:-1]

plt_cols = new_data.columns
values = new_data[plt_cols].values
plt.figure(figsize=(25, 40))
for i in range(len(plt_cols)):
    plt.subplot(len(plt_cols), 1, i + 1)
    plt.plot(values[:, i])
    plt.title(plt_cols[i], y=0.5, loc='right')
plt.show()
# plt.show()
# 对风向进行数值编码
# lbl = LabelEncoder()
# new_data['wnd_dir'] = lbl.fit_transform(new_data['wnd_dir'])


# 归一化 (按照原博将y也一起归一化了)

for col in new_data.columns:
    if col != 'y':
        mms = MinMaxScaler()
        print(col)
        new_data[col] = mms.fit_transform(new_data[col].values.reshape(-1, 1))
print(new_data.head())

# 训练测试集分割
y = new_data.pop('y')
X = new_data
# 使用前1年数据训练，剩余数据为据测试，也可以使用train_test_split随机分割
train_size = 365 * 24 * 4
train_size = 150
X_train = X[:train_size]
X_test = X[train_size:]
y_train = y[:train_size]
y_test = y[train_size:]
X_train = X_train.values.reshape(X_train.shape[0], 1, X_train.shape[1])
X_test = X_test.values.reshape(X_test.shape[0], 1, X_test.shape[1])
print("X_train:", X_train.shape, "y_train:", y_train.shape)  # X_train: (8760, 1, 8) y_train: (8760,)
print("X_test:", X_test.shape, "y_test:", y_test.shape)  # X_test: (35039, 1, 8) y_test: (35039,)

# 使用和参考博客相同的参数

with tf.device("/cpu"):
    model = keras.models.Sequential()

    model.add(keras.layers.LSTM(64, input_shape=(X_train.shape[1], X_train.shape[2])))
    model.add(keras.layers.Dense(32, activation="relu"))
    model.add(keras.layers.Dense(16, activation="relu"))
    model.add(keras.layers.Dense(1))
    model.compile(loss='mse', optimizer='adam')
    history = model.fit(X_train, y_train,
                        epochs=200,
                        batch_size=1,
                        validation_data=(X_test, y_test))


# 省去训练log
# 误差可视化
def plot_learning_curves(history, label, epochs, min_value, max_value, title):
    data = {}
    data['train'] = history.history[label]
    data['test'] = history.history['val_' + label]
    pd.DataFrame(data).plot(figsize=(8, 5))
    plt.grid(True)
    plt.axis([0, epochs, min_value, max_value])
    plt.title(title)
    plt.show()


plot_learning_curves(history, 'loss', 6, 0, 5038352571514, 'MAE')

# 预测评价
# 做归一化的时候没有特意保存scaler，就用label的scaler了，应该没有问题
pred_train = model.predict(X_train)
inv_train = mms.inverse_transform(pred_train)
train_y_true = mms.inverse_transform(y_train.values.reshape(-1, 1))
rmse = np.sqrt(mean_squared_error(train_y_true, inv_train))
print("train rmse:", rmse)  # train rmse:30.686416544450598

pred_test = model.predict(X_test)
inv_pred = mms.inverse_transform(pred_test)
test_y_true = mms.inverse_transform(y_test.values.reshape(-1, 1))
rmse = np.sqrt(mean_squared_error(test_y_true, inv_pred))

print("test rmse:", rmse)  # test rmse: 26.48025156818094

train_len = len(pred_train)
pred_len = len(test_y_true)

plt.plot(range(train_len), train_y_true, label="true")
plt.plot(range(train_len), inv_train, label="pred")
plt.legend()
plt.show()

plt.plot(range(pred_len), test_y_true, label="true")
plt.plot(range(pred_len), inv_pred, label="pred")
plt.legend()
plt.show()

size = 60
plt.scatter(range(size), test_y_true[:size], label="true")
plt.scatter(range(size), inv_pred[:size], label="pred")
plt.legend()
plt.show()
